package top.choviwu.garbage.sort.util;

import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.text.MessageFormat;
import java.util.List;

/**
 * Created by ChoviWu on 2017/12/20
 * Description :∏Ø
 */

@Slf4j
public class SpriderUtils {

    static String agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36";
    static String[] urls = {
            "https://www.guaze.com/juzi/19095.html"
    };

    /**
     * 定向爬取
     *
     * @return
     */
    public static List<String> getHtml() {
        List<String> list = Lists.newArrayList();
        for (String url : urls) {
            log.info(">>>>>>>Url :{}, ", url);
            Document doc = CookieUtils.parseUrl(url);
            Elements elements = doc.getElementsByClass("content");
            elements.forEach(c -> {
                Elements elements1 = c.getElementsByTag("p");
                elements1.forEach(v -> {
                    if (!v.text().contains("http") && !StringUtils.isBlank(v.text())) {
                        list.add(v.text());
                    }
                });

            });
        }
        return list;

    }

    public static void main(String[] args) throws IOException {
        getHtml();
    }
}